package com.zyh.day05

import com.zyh.day04.User
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}

object DatasetTest {
  def main(args: Array[String]): Unit = {
    //1 创建sparkSession
    val spark: SparkSession = SparkSession.builder()
      .appName("dst")
      .master("local[*]").getOrCreate()
    //2 读取数据源
    val rdd: RDD[String] = spark.sparkContext.textFile("file:///D:/users.txt")
    val userRdd: RDD[User] = rdd.map(_.split("\\s+")).map(array => User(array(0).toInt, array(1), array(2).toInt, array(3)))
    import spark.implicits._
    val ds: Dataset[User] = userRdd.toDS()

    //3 注册ds为表
    ds.createOrReplaceTempView("t_user")
    //4 执行sql
    val sql =
      """
        |select name,age
        |from t_user
        |""".stripMargin
    val result: DataFrame = spark.sql(sql)
    //5 处理结果及
    result.show()
    //6 释放资源
    spark.close()
  }
}
